df_counts <- tidyr::pivot_longer(count_matrix,
cols = 2:ncol(count_matrix),
names_to = "sample", values_to = "n_reads"
)
# sort
df_counts <- arrange(df_counts, sample)
#df_counts <- left_join(df_samplesheet, df_counts)
df_correlation <- df_counts %>%
tidyr::pivot_wider(names_from = "sample", values_from = "n_reads") %>%
dplyr::select(-c(1)) %>%
cor()
plot_replicate_correlation <- df_correlation %>%
dplyr::as_tibble() %>%
dplyr::mutate(sample1 = colnames(.)) %>%
tidyr::pivot_longer(
cols = !sample1,
names_to = "sample2", values_to = "cor_coef"
) %>%
ggplot(aes(x = sample1, y = sample2, fill = cor_coef)) +
geom_tile() +
geom_text(color = grey(0.4), aes(label = round(cor_coef, 2))) +
theme_light() +
labs(title = "", x = "", y = "") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
scale_fill_gradientn(
colours = c("#E7298A", grey(0.9), "#66A61E"),
limits = c(-1, 1)
)
plot_replicate_correlation
ggsave("../plots/correlation/pdf/correlation_samples.pdf", plot=plot_replicate_correlation, width=12, height=12)
ggsave("../plots/correlation/png/correlation_samples.png", plot=plot_replicate_correlation, width=12, height=12)
# https://bioinformatics.stackexchange.com/questions/22502/manually-set-range-of-colour-scale-in-pheatmap-in-r
color.divisions <- 100
annotation_days = data.frame(row.names=unique(row.names(df_correlation)), generation=as.character(c(0, 0, 0,0, 1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,6,6,6,6,7,7,7,7,0,0,0,1,1,1,2,2,2,3,3,3,4,4,4,5,5,5)), condition=c(rep("cont",32), rep("LD", 18)), replicate=as.character(c(rep(c(1,2,3,4),8), rep(c(5,6,7), 6))))
# https://stackoverflow.com/questions/41628450/r-pheatmap-change-annotation-colors-and-prevent-graphics-window-from-popping-up
# choose colors for replicate that make difference between the two conditions clearly obvious --> four colours that are similar for HC, three that are similar for LD, let replicate 3 of LD _pop_
# choose gradient of colors for generations
# e.g. Tol from https://davidmathlogic.com/colorblind/#%23D81B60-%231E88E5-%23FFC107-%23004D40
okabe <- c("#f0e442ff", "#e69f00ff", "#d55e00ff", "#cc79a7ff", "#009e73ff", "#56b4e9ff", "#0072b2ff", "#aaaaaaff")
tol <- c("#882255ff","#aa4499ff","#cc6677ff","#ddcc77ff","#88cceeff","#44aa99ff","#117733ff","#332288ff")
okabe_gen <- okabe
names(okabe_gen) <- unique(annotation_days$generation)
okabe_replic <- okabe[1:7]
names(okabe_replic) <- unique(annotation_days$replicate)
annotation_color_list <- list(condition=c("cont"=okabe[7], "LD"=okabe[2]), generation=okabe_gen, replicate=okabe_replic)
okabe <- c("#fff2ecff","#f0e442ff", "#f5a700ff", "#9a4400ff", "#cc79a7ff", "#56b4e9ff", "#0072b2ff", "#003450ff")
okabe_gen <- okabe
names(okabe_gen) <- unique(annotation_days$generation)
based_on_col <- c("#009affff","#007acbff","#005b98ff","#003d66ff","#ff9dd5ff","#cc84aeff","#9a5b83ff")
names(based_on_col) <- unique(annotation_days$replicate)
annotation_color_list <- list(condition=c("cont"="#005a96ff", "LD"="#c57ba5ff"), replicate=based_on_col, generation=okabe_gen)
p <- pheatmap(df_correlation, display_numbers=TRUE, treeheight_col=0, cutree_rows = 3, cutree_cols = 3, annotation_row = annotation_days, annotation_colors = annotation_color_list, breaks = seq(-1,1, length.out=(color.divisions + 1)))
p
ggsave("../plots/correlation/png/correlation_samples_clustering.png", plot=p, width=11.5, height=8)
ggsave("../plots/correlation/pdf/correlation_samples_clustering.pdf", plot=p, width=11.5, height=8)
p <- pheatmap(df_correlation, display_numbers=FALSE, treeheight_col=0, cutree_rows = 3, cutree_cols = 3, annotation_row = annotation_days, annotation_colors = annotation_color_list, breaks = seq(-1,1, length.out=(color.divisions + 1)))
p
ggsave("../plots/correlation/png/correlation_samples_clustering_woNumbers.png", plot=p, width=11.5, height=8)
ggsave("../plots/correlation/pdf/correlation_samples_clustering_woNumbers.pdf", plot=p, width=11.5, height=8)
## R version 4.4.1 (2024-06-14)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 22.04.4 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so; LAPACK version 3.10.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=sv_SE.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=sv_SE.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=sv_SE.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=sv_SE.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Europe/Stockholm
## tzcode source: system (glibc)
##
## attached base packages:
## [1] tcltk grid stats4 stats graphics grDevices utils
## [8] datasets methods base
##
## other attached packages:
## [1] pheatmap_1.0.12 ggVennDiagram_1.5.2
## [3] Mfuzz_2.64.0 DynDoc_1.82.0
## [5] widgetTools_1.82.0 e1071_1.7-14
## [7] edgeR_4.2.0 limma_3.60.2
## [9] ComplexHeatmap_2.20.0 Heatplus_3.12.0
## [11] ggnewscale_0.4.10 ggrepel_0.9.5
## [13] colorblindr_0.1.0 colorspace_2.1-1
## [15] DescTools_0.99.54 DESeq2_1.44.0
## [17] SummarizedExperiment_1.34.0 Biobase_2.64.0
## [19] MatrixGenerics_1.16.0 matrixStats_1.3.0
## [21] GenomicRanges_1.56.0 GenomeInfoDb_1.40.1
## [23] IRanges_2.38.0 S4Vectors_0.42.0
## [25] BiocGenerics_0.50.0 lubridate_1.9.3
## [27] forcats_1.0.0 stringr_1.5.1
## [29] dplyr_1.1.4 purrr_1.0.2
## [31] readr_2.1.5 tidyr_1.3.1
## [33] tibble_3.2.1 tidyverse_2.0.0
## [35] ggplot2_3.5.1
##
## loaded via a namespace (and not attached):
## [1] gld_2.6.6 readxl_1.4.3 rlang_1.1.3
## [4] magrittr_2.0.3 clue_0.3-65 GetoptLong_1.0.5
## [7] compiler_4.4.1 systemfonts_1.1.0 png_0.1-8
## [10] vctrs_0.6.5 shape_1.4.6.1 pkgconfig_2.0.3
## [13] crayon_1.5.2 fastmap_1.2.0 XVector_0.44.0
## [16] labeling_0.4.3 utf8_1.2.4 rmarkdown_2.27
## [19] tzdb_0.4.0 UCSC.utils_1.0.0 ragg_1.3.2
## [22] bit_4.0.5 xfun_0.44 zlibbioc_1.50.0
## [25] cachem_1.1.0 jsonlite_1.8.8 highr_0.11
## [28] DelayedArray_0.30.1 BiocParallel_1.38.0 cluster_2.1.6
## [31] parallel_4.4.1 R6_2.5.1 bslib_0.7.0
## [34] stringi_1.8.4 RColorBrewer_1.1-3 boot_1.3-30
## [37] jquerylib_0.1.4 cellranger_1.1.0 Rcpp_1.0.12
## [40] iterators_1.0.14 knitr_1.47 Matrix_1.6-5
## [43] timechange_0.3.0 tidyselect_1.2.1 rstudioapi_0.16.0
## [46] abind_1.4-5 yaml_2.3.8 doParallel_1.0.17
## [49] codetools_0.2-19 lattice_0.22-5 withr_3.0.0
## [52] evaluate_0.23 proxy_0.4-27 circlize_0.4.16
## [55] pillar_1.9.0 tkWidgets_1.82.0 foreach_1.5.2
## [58] generics_0.1.3 vroom_1.6.5 hms_1.1.3
## [61] munsell_0.5.1 scales_1.3.0 rootSolve_1.8.2.4
## [64] class_7.3-22 glue_1.7.0 lmom_3.0
## [67] tools_4.4.1 data.table_1.15.4 locfit_1.5-9.9
## [70] Exact_3.2 mvtnorm_1.2-5 GenomeInfoDbData_1.2.12
## [73] cli_3.6.2 textshaping_0.4.0 fansi_1.0.6
## [76] expm_0.999-9 S4Arrays_1.4.1 gtable_0.3.5
## [79] sass_0.4.9 digest_0.6.35 SparseArray_1.4.8
## [82] farver_2.1.2 rjson_0.2.21 htmltools_0.5.8.1
## [85] lifecycle_1.0.4 httr_1.4.7 statmod_1.5.0
## [88] GlobalOptions_0.1.2 bit64_4.0.5 MASS_7.3-61